package com.example.webmagic;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;

/**
 * Created by Geekkiller on 2017/11/16.
 */

/**
 * 解析器
 */
public class BaoZouProcessor implements PageProcessor {

    // 抓取网站的相关配置，包括编码、抓取间隔、重试次数等
    private Site site = Site.me().setRetryTimes(3).setSleepTime(100);

    @Override
    public void process(Page page) {
        page.addTargetRequests(page.getHtml().css("Div .pager-content").links().all());
        BaozouNews news = new BaozouNews();
        news.setAuthor(page.getHtml().xpath("//a[@class='article-author-name']/text()").toString());
        news.setContent(page.getHtml().xpath("//div[@class='article article-text']/@data-text").toString());
        news.setTime(page.getHtml().xpath("//span[@class='article-date']/text()").toString());
        page.putField("news", news);

    }

    @Override
    public Site getSite() {
        return site;
    }
}
